library(conText)
library(tidyverse)
library(ggthemes)
library(tidylog)
library(zoo)
library(tsibble)
library(lubridate)

# corpus
twts_corpus <- readRDS("data/analysis/MPtweets_corpus.rds")

# (GloVe) pre-trained embeddings
pre_trained <- readRDS("data/wordembeddings/glove.rds")

# transformation matrix
transform_matrix <- readRDS("data/wordembeddings/khodakA.rds")

# generate year-month and year-week variables
twts_corpus <- twts_corpus %>%
  mutate(yearmon = as.Date(as.yearmon(date)),
         yearwk = as.Date(yearweek(date)),
         year = year(date))

# get functions for ts similarity distances
# this is now "get_seq_cos_sim()" in the Github version of conText but use the utility function here,
# which is doing the same thing

source("utils.R")

#get cosine similarities over time

cos_simsdf <-
  get_ts_cos_sim(
    x = twts_corpus,
    timevar = "yearwk",
    textvar = "tweet",
    target = "climate",
    candidates = c("emergency", "crisis", "emergencies", "crises", "and", "the"),
    pre_trained = pre_trained,
    transform_matrix = transform_matrix
  )

# saveRDS(cos_simsdf, file = "data/analysis/cos_sims_tweets.rds")
